read.csv or readr::read_csvdownloader package or curlmanifestoRmanifestoRlibrary(manifestoR)
# retrieve API key stored in .Rprofile
mp_setapikey(key = getOption("manifesto_key"))(mpds <- mp_maindataset())## Connecting to Manifesto Project DB API...
## Connecting to Manifesto Project DB API... corpus version: 2017-1
## # A tibble: 4,214 x 173
## country countryname oecdmember eumember edate date party
## <dbl> <chr> <dbl> <dbl> <date> <dbl> <dbl>
## 1 11 Sweden 0 0 1944-09-17 194409 11220
## 2 11 Sweden 0 0 1944-09-17 194409 11320
## 3 11 Sweden 0 0 1944-09-17 194409 11420
## 4 11 Sweden 0 0 1944-09-17 194409 11620
## 5 11 Sweden 0 0 1944-09-17 194409 11810
## 6 11 Sweden 0 0 1948-09-19 194809 11220
## 7 11 Sweden 0 0 1948-09-19 194809 11320
## 8 11 Sweden 0 0 1948-09-19 194809 11420
## 9 11 Sweden 0 0 1948-09-19 194809 11620
## 10 11 Sweden 0 0 1948-09-19 194809 11810
## # ... with 4,204 more rows, and 166 more variables: partyname <chr>,
## # partyabbrev <chr>, parfam <dbl>, coderid <dbl>, manual <dbl>,
## # coderyear <dbl>, testresult <dbl>, testeditsim <dbl>, pervote <dbl>,
## # voteest <dbl>, presvote <dbl>, absseat <dbl>, totseats <dbl>,
## # progtype <dbl>, datasetorigin <dbl>, corpusversion <chr>, total <dbl>,
## # peruncod <dbl>, per101 <dbl>, per102 <dbl>, per103 <dbl>,
## # per104 <dbl>, per105 <dbl>, per106 <dbl>, per107 <dbl>, per108 <dbl>,
## # per109 <dbl>, per110 <dbl>, per201 <dbl>, per202 <dbl>, per203 <dbl>,
## # per204 <dbl>, per301 <dbl>, per302 <dbl>, per303 <dbl>, per304 <dbl>,
## # per305 <dbl>, per401 <dbl>, per402 <dbl>, per403 <dbl>, per404 <dbl>,
## # per405 <dbl>, per406 <dbl>, per407 <dbl>, per408 <dbl>, per409 <dbl>,
## # per410 <dbl>, per411 <dbl>, per412 <dbl>, per413 <dbl>, per414 <dbl>,
## # per415 <dbl>, per416 <dbl>, per501 <dbl>, per502 <dbl>, per503 <dbl>,
## # per504 <dbl>, per505 <dbl>, per506 <dbl>, per507 <dbl>, per601 <dbl>,
## # per602 <dbl>, per603 <dbl>, per604 <dbl>, per605 <dbl>, per606 <dbl>,
## # per607 <dbl>, per608 <dbl>, per701 <dbl>, per702 <dbl>, per703 <dbl>,
## # per704 <dbl>, per705 <dbl>, per706 <dbl>, per1011 <dbl>,
## # per1012 <dbl>, per1013 <dbl>, per1014 <dbl>, per1015 <dbl>,
## # per1016 <dbl>, per1021 <dbl>, per1022 <dbl>, per1023 <dbl>,
## # per1024 <dbl>, per1025 <dbl>, per1026 <dbl>, per1031 <dbl>,
## # per1032 <dbl>, per1033 <dbl>, per2021 <dbl>, per2022 <dbl>,
## # per2023 <dbl>, per2031 <dbl>, per2032 <dbl>, per2033 <dbl>,
## # per2041 <dbl>, per3011 <dbl>, per3051 <dbl>, per3052 <dbl>,
## # per3053 <dbl>, ...
mpds %>%
filter(countryname == "Sweden") %>%
count(partyname) %>%
ggplot(aes(fct_reorder(partyname, n), n)) +
geom_col() +
labs(title = "Political manifestos published in Sweden",
x = NULL,
y = "Total (1948-present)") +
coord_flip()mpds %>%
filter(party == 61320 | party == 61620) %>%
mutate(ideo = mp_scale(.)) %>%
select(partyname, edate, ideo) %>%
ggplot(aes(edate, ideo, color = partyname)) +
geom_line() +
scale_color_manual(values = c("blue", "red")) +
labs(title = "Ideological scaling of major US political parties",
x = "Year",
y = "Ideological position",
color = NULL) +
theme(legend.position = "bottom")# download documents
(docs <- mp_corpus(countryname == "United States" & edate > as.Date("2012-01-01")))## Connecting to Manifesto Project DB API...
## Connecting to Manifesto Project DB API... corpus version: 2017-1
## Connecting to Manifesto Project DB API...
## Connecting to Manifesto Project DB API... corpus version: 2017-1
## Connecting to Manifesto Project DB API... corpus version: 2017-1
## Connecting to Manifesto Project DB API... corpus version: 2017-1
## <<ManifestoCorpus>>
## Metadata: corpus specific: 0, document level (indexed): 0
## Content: documents: 2
# generate wordcloud of most common terms
docs %>%
tidy() %>%
mutate(party = factor(party, levels = c(61320, 61620),
labels = c("Democratic Party", "Republican Party"))) %>%
unnest_tokens(word, text) %>%
anti_join(stop_words) %>%
count(party, word, sort = TRUE) %>%
na.omit() %>%
reshape2::acast(word ~ party, value.var = "n", fill = 0) %>%
comparison.cloud(max.words = 200)twitteRlibrary(twitteR).Rprofile methodsetup_twitter_oauth() from the consolesetup_twitter_oauth(consumer_key = getOption("twitter_api_key"),
consumer_secret = getOption("twitter_api_token"))## [1] "Using browser based authentication"
tweets <- searchTwitter('#rstats', n = 5)
tweets## [[1]]
## [1] "RStudioJoe: #rstats Thank you @jkregenstein for a year's worth of illuminating posts on \"Reproducible Finance with R\" on R View… https://t.co/fJBMn4zKNT"
##
## [[2]]
## [1] "yuhangx: RT @dalejbarr: Sign up for this @PSstatistics course on #rstats for psych w/me & @luc_bussiere on the banks of Loch Lomond! Apr2018 https:/…"
##
## [[3]]
## [1] "MooresMt: RT @RLangTip: Get started with foreach and parallel programming: https://t.co/192iEfHCKc #rstats"
##
## [[4]]
## [1] "BuzzNicholson: RT @sesync: Want to learn about geospatial data analysis? Apply for short course by Jan 5. #rstats https://t.co/qQssORUXwt https://t.co/VUw…"
##
## [[5]]
## [1] "srharacha: RT @ucfagls: Smoothing discrete spatial data with a Markov random field smoother in #mgcv w #rstats\nhttps://t.co/Tm5Im3tt2z https://t.co/iA…"
clinton <- getUser("hillaryclinton")
clinton$getDescription()## [1] "Wife, mom, grandma, women+kids advocate, FLOTUS, Senator, SecState, hair icon, pantsuit aficionado, 2016 presidential candidate."
clinton$getFriends(n = 5)## $`18622869`
## [1] "ezraklein"
##
## $`2590811666`
## [1] "Color"
##
## $`913945418005958656`
## [1] "TATLGDoc"
##
## $`587536673`
## [1] "GiffordsCourage"
##
## $`14868699`
## [1] "ScaryMommy"
str(tweets)## List of 5
## $ :Reference class 'status' [package "twitteR"] with 17 fields
## ..$ text : chr "#rstats Thank you @jkregenstein for a year's worth of illuminating posts on \"Reproducible Finance with R\" on "| __truncated__
## ..$ favorited : logi FALSE
## ..$ favoriteCount: num 0
## ..$ replyToSN : chr(0)
## ..$ created : POSIXct[1:1], format: "2017-11-10 19:19:17"
## ..$ truncated : logi TRUE
## ..$ replyToSID : chr(0)
## ..$ id : chr "929065944768438272"
## ..$ replyToUID : chr(0)
## ..$ statusSource : chr "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>"
## ..$ screenName : chr "RStudioJoe"
## ..$ retweetCount : num 0
## ..$ isRetweet : logi FALSE
## ..$ retweeted : logi FALSE
## ..$ longitude : chr(0)
## ..$ latitude : chr(0)
## ..$ urls :'data.frame': 1 obs. of 5 variables:
## .. ..$ url : chr "https://t.co/fJBMn4zKNT"
## .. ..$ expanded_url: chr "https://twitter.com/i/web/status/929065944768438272"
## .. ..$ display_url : chr "twitter.com/i/web/status/9…"
## .. ..$ start_index : num 117
## .. ..$ stop_index : num 140
## ..and 53 methods, of which 39 are possibly relevant:
## .. getCreated, getFavoriteCount, getFavorited, getId, getIsRetweet,
## .. getLatitude, getLongitude, getReplyToSID, getReplyToSN,
## .. getReplyToUID, getRetweetCount, getRetweeted, getRetweeters,
## .. getRetweets, getScreenName, getStatusSource, getText, getTruncated,
## .. getUrls, initialize, setCreated, setFavoriteCount, setFavorited,
## .. setId, setIsRetweet, setLatitude, setLongitude, setReplyToSID,
## .. setReplyToSN, setReplyToUID, setRetweetCount, setRetweeted,
## .. setScreenName, setStatusSource, setText, setTruncated, setUrls,
## .. toDataFrame, toDataFrame#twitterObj
## $ :Reference class 'status' [package "twitteR"] with 17 fields
## ..$ text : chr "RT @dalejbarr: Sign up for this @PSstatistics course on #rstats for psych w/me & @luc_bussiere on the banks"| __truncated__
## ..$ favorited : logi FALSE
## ..$ favoriteCount: num 0
## ..$ replyToSN : chr(0)
## ..$ created : POSIXct[1:1], format: "2017-11-10 19:18:43"
## ..$ truncated : logi FALSE
## ..$ replyToSID : chr(0)
## ..$ id : chr "929065805257551875"
## ..$ replyToUID : chr(0)
## ..$ statusSource : chr "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>"
## ..$ screenName : chr "yuhangx"
## ..$ retweetCount : num 9
## ..$ isRetweet : logi TRUE
## ..$ retweeted : logi FALSE
## ..$ longitude : chr(0)
## ..$ latitude : chr(0)
## ..$ urls :'data.frame': 0 obs. of 4 variables:
## .. ..$ url : chr(0)
## .. ..$ expanded_url: chr(0)
## .. ..$ dispaly_url : chr(0)
## .. ..$ indices : num(0)
## ..and 53 methods, of which 39 are possibly relevant:
## .. getCreated, getFavoriteCount, getFavorited, getId, getIsRetweet,
## .. getLatitude, getLongitude, getReplyToSID, getReplyToSN,
## .. getReplyToUID, getRetweetCount, getRetweeted, getRetweeters,
## .. getRetweets, getScreenName, getStatusSource, getText, getTruncated,
## .. getUrls, initialize, setCreated, setFavoriteCount, setFavorited,
## .. setId, setIsRetweet, setLatitude, setLongitude, setReplyToSID,
## .. setReplyToSN, setReplyToUID, setRetweetCount, setRetweeted,
## .. setScreenName, setStatusSource, setText, setTruncated, setUrls,
## .. toDataFrame, toDataFrame#twitterObj
## $ :Reference class 'status' [package "twitteR"] with 17 fields
## ..$ text : chr "RT @RLangTip: Get started with foreach and parallel programming: https://t.co/192iEfHCKc #rstats"
## ..$ favorited : logi FALSE
## ..$ favoriteCount: num 0
## ..$ replyToSN : chr(0)
## ..$ created : POSIXct[1:1], format: "2017-11-10 19:18:43"
## ..$ truncated : logi FALSE
## ..$ replyToSID : chr(0)
## ..$ id : chr "929065803672104960"
## ..$ replyToUID : chr(0)
## ..$ statusSource : chr "<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>"
## ..$ screenName : chr "MooresMt"
## ..$ retweetCount : num 8
## ..$ isRetweet : logi TRUE
## ..$ retweeted : logi FALSE
## ..$ longitude : chr(0)
## ..$ latitude : chr(0)
## ..$ urls :'data.frame': 1 obs. of 5 variables:
## .. ..$ url : chr "https://t.co/192iEfHCKc"
## .. ..$ expanded_url: chr "https://cran.r-project.org/web/packages/doParallel/vignettes/gettingstartedParallel.pdf"
## .. ..$ display_url : chr "cran.r-project.org/web/packages/d…"
## .. ..$ start_index : num 65
## .. ..$ stop_index : num 88
## ..and 53 methods, of which 39 are possibly relevant:
## .. getCreated, getFavoriteCount, getFavorited, getId, getIsRetweet,
## .. getLatitude, getLongitude, getReplyToSID, getReplyToSN,
## .. getReplyToUID, getRetweetCount, getRetweeted, getRetweeters,
## .. getRetweets, getScreenName, getStatusSource, getText, getTruncated,
## .. getUrls, initialize, setCreated, setFavoriteCount, setFavorited,
## .. setId, setIsRetweet, setLatitude, setLongitude, setReplyToSID,
## .. setReplyToSN, setReplyToUID, setRetweetCount, setRetweeted,
## .. setScreenName, setStatusSource, setText, setTruncated, setUrls,
## .. toDataFrame, toDataFrame#twitterObj
## $ :Reference class 'status' [package "twitteR"] with 17 fields
## ..$ text : chr "RT @sesync: Want to learn about geospatial data analysis? Apply for short course by Jan 5. #rstats https://t.co"| __truncated__
## ..$ favorited : logi FALSE
## ..$ favoriteCount: num 0
## ..$ replyToSN : chr(0)
## ..$ created : POSIXct[1:1], format: "2017-11-10 19:15:47"
## ..$ truncated : logi FALSE
## ..$ replyToSID : chr(0)
## ..$ id : chr "929065065982119936"
## ..$ replyToUID : chr(0)
## ..$ statusSource : chr "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>"
## ..$ screenName : chr "BuzzNicholson"
## ..$ retweetCount : num 3
## ..$ isRetweet : logi TRUE
## ..$ retweeted : logi FALSE
## ..$ longitude : chr(0)
## ..$ latitude : chr(0)
## ..$ urls :'data.frame': 1 obs. of 5 variables:
## .. ..$ url : chr "https://t.co/qQssORUXwt"
## .. ..$ expanded_url: chr "https://www.sesync.org/opportunities/short-courses/geospatial-data-analysis-short-course"
## .. ..$ display_url : chr "sesync.org/opportunities/…"
## .. ..$ start_index : num 99
## .. ..$ stop_index : num 122
## ..and 53 methods, of which 39 are possibly relevant:
## .. getCreated, getFavoriteCount, getFavorited, getId, getIsRetweet,
## .. getLatitude, getLongitude, getReplyToSID, getReplyToSN,
## .. getReplyToUID, getRetweetCount, getRetweeted, getRetweeters,
## .. getRetweets, getScreenName, getStatusSource, getText, getTruncated,
## .. getUrls, initialize, setCreated, setFavoriteCount, setFavorited,
## .. setId, setIsRetweet, setLatitude, setLongitude, setReplyToSID,
## .. setReplyToSN, setReplyToUID, setRetweetCount, setRetweeted,
## .. setScreenName, setStatusSource, setText, setTruncated, setUrls,
## .. toDataFrame, toDataFrame#twitterObj
## $ :Reference class 'status' [package "twitteR"] with 17 fields
## ..$ text : chr "RT @ucfagls: Smoothing discrete spatial data with a Markov random field smoother in #mgcv w #rstats\nhttps://t."| __truncated__
## ..$ favorited : logi FALSE
## ..$ favoriteCount: num 0
## ..$ replyToSN : chr(0)
## ..$ created : POSIXct[1:1], format: "2017-11-10 19:15:45"
## ..$ truncated : logi FALSE
## ..$ replyToSID : chr(0)
## ..$ id : chr "929065058088378368"
## ..$ replyToUID : chr(0)
## ..$ statusSource : chr "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>"
## ..$ screenName : chr "srharacha"
## ..$ retweetCount : num 13
## ..$ isRetweet : logi TRUE
## ..$ retweeted : logi FALSE
## ..$ longitude : chr(0)
## ..$ latitude : chr(0)
## ..$ urls :'data.frame': 1 obs. of 5 variables:
## .. ..$ url : chr "https://t.co/Tm5Im3tt2z"
## .. ..$ expanded_url: chr "http://www.fromthebottomoftheheap.net/2017/10/19/first-steps-with-mrf-smooths/"
## .. ..$ display_url : chr "fromthebottomoftheheap.net/2017/10/19/fir…"
## .. ..$ start_index : num 100
## .. ..$ stop_index : num 123
## ..and 53 methods, of which 39 are possibly relevant:
## .. getCreated, getFavoriteCount, getFavorited, getId, getIsRetweet,
## .. getLatitude, getLongitude, getReplyToSID, getReplyToSN,
## .. getReplyToUID, getRetweetCount, getRetweeted, getRetweeters,
## .. getRetweets, getScreenName, getStatusSource, getText, getTruncated,
## .. getUrls, initialize, setCreated, setFavoriteCount, setFavorited,
## .. setId, setIsRetweet, setLatitude, setLongitude, setReplyToSID,
## .. setReplyToSN, setReplyToUID, setRetweetCount, setRetweeted,
## .. setScreenName, setStatusSource, setText, setTruncated, setUrls,
## .. toDataFrame, toDataFrame#twitterObj
df <- twListToDF(tweets) %>%
as_tibble()
df## # A tibble: 5 x 16
## text
## <chr>
## 1 "#rstats Thank you @jkregenstein for a year's worth of illuminating posts o
## 2 RT @dalejbarr: Sign up for this @PSstatistics course on #rstats for psych w
## 3 RT @RLangTip: Get started with foreach and parallel programming: https://t.
## 4 RT @sesync: Want to learn about geospatial data analysis? Apply for short c
## 5 "RT @ucfagls: Smoothing discrete spatial data with a Markov random field sm
## # ... with 15 more variables: favorited <lgl>, favoriteCount <dbl>,
## # replyToSN <lgl>, created <dttm>, truncated <lgl>, replyToSID <lgl>,
## # id <chr>, replyToUID <lgl>, statusSource <chr>, screenName <chr>,
## # retweetCount <dbl>, isRetweet <lgl>, retweeted <lgl>, longitude <lgl>,
## # latitude <lgl>
twitteR# omdb API function
omdb <- function(Key, Title, Year, Plot, Format){
baseurl <- "http://www.omdbapi.com/?"
params <- c("apikey=", "t=", "y=", "plot=", "r=")
values <- c(Key, Title, Year, Plot, Format)
param_values <- map2_chr(params, values, str_c)
args <- str_c(param_values, collapse = "&")
str_c(baseurl, args)
}
# use curl to execute the query
request_sharknado <- omdb(getOption("omdb_key"), "Sharknado", "2013", "short", "json")
con <- curl(request_sharknado)
answer_json <- readLines(con)
close(con)
# convert to data frame
answer_json %>%
fromJSON() %>%
as_tibble()## Error: Column `Ratings` must be a 1d atomic vector or a list
sharknado <- answer_json %>%
fromJSON()
str(sharknado)## List of 25
## $ Title : chr "Sharknado"
## $ Year : chr "2013"
## $ Rated : chr "TV-14"
## $ Released : chr "11 Jul 2013"
## $ Runtime : chr "86 min"
## $ Genre : chr "Comedy, Horror, Sci-Fi"
## $ Director : chr "Anthony C. Ferrante"
## $ Writer : chr "Thunder Levin"
## $ Actors : chr "Ian Ziering, Tara Reid, John Heard, Cassandra Scerbo"
## $ Plot : chr "When a freak hurricane swamps Los Angeles, nature's deadliest killer rules sea, land, and air as thousands of s"| __truncated__
## $ Language : chr "English"
## $ Country : chr "USA"
## $ Awards : chr "1 win & 2 nominations."
## $ Poster : chr "https://images-na.ssl-images-amazon.com/images/M/MV5BOTE2OTk4MTQzNV5BMl5BanBnXkFtZTcwODUxOTM3OQ@@._V1_SX300.jpg"
## $ Ratings :'data.frame': 2 obs. of 2 variables:
## ..$ Source: chr [1:2] "Internet Movie Database" "Rotten Tomatoes"
## ..$ Value : chr [1:2] "3.3/10" "82%"
## $ Metascore : chr "N/A"
## $ imdbRating: chr "3.3"
## $ imdbVotes : chr "38,948"
## $ imdbID : chr "tt2724064"
## $ Type : chr "movie"
## $ DVD : chr "03 Sep 2013"
## $ BoxOffice : chr "N/A"
## $ Production: chr "NCM Fathom"
## $ Website : chr "http://www.mtivideo.com/TitleView.aspx?TITLE_ID=728"
## $ Response : chr "True"
jsonedit(sharknado, mode = "view", elementId = "sharknado")library(purrr)
library(repurrrsive)str(got_chars, list.len = 3)## List of 29
## $ :List of 18
## ..$ url : chr "http://www.anapioficeandfire.com/api/characters/1022"
## ..$ id : int 1022
## ..$ name : chr "Theon Greyjoy"
## .. [list output truncated]
## $ :List of 18
## ..$ url : chr "http://www.anapioficeandfire.com/api/characters/1052"
## ..$ id : int 1052
## ..$ name : chr "Tyrion Lannister"
## .. [list output truncated]
## $ :List of 18
## ..$ url : chr "http://www.anapioficeandfire.com/api/characters/1074"
## ..$ id : int 1074
## ..$ name : chr "Victarion Greyjoy"
## .. [list output truncated]
## [list output truncated]
jsonedit(got_chars, mode = "view", elementId = "got_chars")map(got_chars[1:4], "name")## [[1]]
## [1] "Theon Greyjoy"
##
## [[2]]
## [1] "Tyrion Lannister"
##
## [[3]]
## [1] "Victarion Greyjoy"
##
## [[4]]
## [1] "Will"
function(x) x[["TEXT"]]map(got_chars[5:8], 3)## [[1]]
## [1] "Areo Hotah"
##
## [[2]]
## [1] "Chett"
##
## [[3]]
## [1] "Cressen"
##
## [[4]]
## [1] "Arianne Martell"
function(x) x[[i]]got_chars %>%
map("name")
got_chars %>%
map(3)map_chr(got_chars[9:12], "name")## [1] "Daenerys Targaryen" "Davos Seaworth" "Arya Stark"
## [4] "Arys Oakheart"
map_chr(got_chars[13:16], 3)## [1] "Asha Greyjoy" "Barristan Selmy" "Varamyr" "Brandon Stark"
# Victarion element
got_chars[[3]]## $url
## [1] "http://www.anapioficeandfire.com/api/characters/1074"
##
## $id
## [1] 1074
##
## $name
## [1] "Victarion Greyjoy"
##
## $gender
## [1] "Male"
##
## $culture
## [1] "Ironborn"
##
## $born
## [1] "In 268 AC or before, at Pyke"
##
## $died
## [1] ""
##
## $alive
## [1] TRUE
##
## $titles
## [1] "Lord Captain of the Iron Fleet" "Master of the Iron Victory"
##
## $aliases
## [1] "The Iron Captain"
##
## $father
## [1] ""
##
## $mother
## [1] ""
##
## $spouse
## [1] ""
##
## $allegiances
## [1] "House Greyjoy of Pyke"
##
## $books
## [1] "A Game of Thrones" "A Clash of Kings" "A Storm of Swords"
##
## $povBooks
## [1] "A Feast for Crows" "A Dance with Dragons"
##
## $tvSeries
## list()
##
## $playedBy
## list()
# specific elements for Victarion
got_chars[[3]][c("name", "culture", "gender", "born")]## $name
## [1] "Victarion Greyjoy"
##
## $culture
## [1] "Ironborn"
##
## $gender
## [1] "Male"
##
## $born
## [1] "In 268 AC or before, at Pyke"
map() frameworkmap(.x, .f, ...).f = [... = character vector identifying the names of the elements to extractmap() frameworkx <- map(got_chars, `[`, c("name", "culture", "gender", "born"))
str(x[16:17])## List of 2
## $ :List of 4
## ..$ name : chr "Brandon Stark"
## ..$ culture: chr "Northmen"
## ..$ gender : chr "Male"
## ..$ born : chr "In 290 AC, at Winterfell"
## $ :List of 4
## ..$ name : chr "Brienne of Tarth"
## ..$ culture: chr ""
## ..$ gender : chr "Female"
## ..$ born : chr "In 280 AC"
magrittr::extract()library(magrittr)
x <- map(got_chars, extract, c("name", "culture", "gender", "born"))
str(x[18:19])## List of 2
## $ :List of 4
## ..$ name : chr "Catelyn Stark"
## ..$ culture: chr "Rivermen"
## ..$ gender : chr "Female"
## ..$ born : chr "In 264 AC, at Riverrun"
## $ :List of 4
## ..$ name : chr "Cersei Lannister"
## ..$ culture: chr "Westerman"
## ..$ gender : chr "Female"
## ..$ born : chr "In 266 AC, at Casterly Rock"
map_df(got_chars, extract, c("name", "culture", "gender", "id", "born", "alive"))## # A tibble: 29 x 6
## name culture gender id
## <chr> <chr> <chr> <int>
## 1 Theon Greyjoy Ironborn Male 1022
## 2 Tyrion Lannister Male 1052
## 3 Victarion Greyjoy Ironborn Male 1074
## 4 Will Male 1109
## 5 Areo Hotah Norvoshi Male 1166
## 6 Chett Male 1267
## 7 Cressen Male 1295
## 8 Arianne Martell Dornish Female 130
## 9 Daenerys Targaryen Valyrian Female 1303
## 10 Davos Seaworth Westeros Male 1319
## # ... with 19 more rows, and 2 more variables: born <chr>, alive <lgl>
got_chars %>% {
tibble(
name = map_chr(., "name"),
culture = map_chr(., "culture"),
gender = map_chr(., "gender"),
id = map_int(., "id"),
born = map_chr(., "born"),
alive = map_lgl(., "alive")
)
}## # A tibble: 29 x 6
## name culture gender id
## <chr> <chr> <chr> <int>
## 1 Theon Greyjoy Ironborn Male 1022
## 2 Tyrion Lannister Male 1052
## 3 Victarion Greyjoy Ironborn Male 1074
## 4 Will Male 1109
## 5 Areo Hotah Norvoshi Male 1166
## 6 Chett Male 1267
## 7 Cressen Male 1295
## 8 Arianne Martell Dornish Female 130
## 9 Daenerys Targaryen Valyrian Female 1303
## 10 Davos Seaworth Westeros Male 1319
## # ... with 19 more rows, and 2 more variables: born <chr>, alive <lgl>
gh_usersstr(gh_repos, list.len = 2)## List of 6
## $ :List of 30
## ..$ :List of 68
## .. ..$ id : int 61160198
## .. ..$ name : chr "after"
## .. .. [list output truncated]
## ..$ :List of 68
## .. ..$ id : int 40500181
## .. ..$ name : chr "argufy"
## .. .. [list output truncated]
## .. [list output truncated]
## $ :List of 30
## ..$ :List of 68
## .. ..$ id : int 14756210
## .. ..$ name : chr "2013-11_sfu"
## .. .. [list output truncated]
## ..$ :List of 68
## .. ..$ id : int 14152301
## .. ..$ name : chr "2014-01-27-miami"
## .. .. [list output truncated]
## .. [list output truncated]
## [list output truncated]
jsonedit(gh_repos, mode = "view", elementId = "gh_repos")gh_repos %>%
map_chr(c(1, 3))## [1] "gaborcsardi/after" "jennybc/2013-11_sfu" "jtleek/advdatasci"
## [4] "juliasilge/2016-14" "leeper/ampolcourse" "masalmon/aqi_pdf"
One row per repository, with variables identifying which GitHub user owns it, the repository name, etc.
gh_repos(unames <- map_chr(gh_repos, c(1, 4, 1)))## [1] "gaborcsardi" "jennybc" "jtleek" "juliasilge" "leeper"
## [6] "masalmon"
(udf <- gh_repos %>%
set_names(unames) %>%
enframe("username", "gh_repos"))## # A tibble: 6 x 2
## username gh_repos
## <chr> <list>
## 1 gaborcsardi <list [30]>
## 2 jennybc <list [30]>
## 3 jtleek <list [30]>
## 4 juliasilge <list [26]>
## 5 leeper <list [30]>
## 6 masalmon <list [30]>
udf %>%
mutate(n_repos = map_int(gh_repos, length))## # A tibble: 6 x 3
## username gh_repos n_repos
## <chr> <list> <int>
## 1 gaborcsardi <list [30]> 30
## 2 jennybc <list [30]> 30
## 3 jtleek <list [30]> 30
## 4 juliasilge <list [26]> 26
## 5 leeper <list [30]> 30
## 6 masalmon <list [30]> 30
# one_user is a list of repos for one user
one_user <- udf$gh_repos[[1]]
# one_user[[1]] is a list of info for one repo
one_repo <- one_user[[1]]
str(one_repo, max.level = 1, list.len = 5)## List of 68
## $ id : int 61160198
## $ name : chr "after"
## $ full_name : chr "gaborcsardi/after"
## $ owner :List of 17
## $ private : logi FALSE
## [list output truncated]
# a highly selective list of tibble-worthy info for one repo
one_repo[c("name", "fork", "open_issues")]## $name
## [1] "after"
##
## $fork
## [1] FALSE
##
## $open_issues
## [1] 0
# make a data frame of that info for all a user's repos
map_df(one_user, `[`, c("name", "fork", "open_issues"))## # A tibble: 30 x 3
## name fork open_issues
## <chr> <lgl> <int>
## 1 after FALSE 0
## 2 argufy FALSE 6
## 3 ask FALSE 4
## 4 baseimports FALSE 0
## 5 citest TRUE 0
## 6 clisymbols FALSE 0
## 7 cmaker TRUE 0
## 8 cmark TRUE 0
## 9 conditions TRUE 0
## 10 crayon FALSE 7
## # ... with 20 more rows
map_df(one_user, extract, c("name", "fork", "open_issues"))## # A tibble: 30 x 3
## name fork open_issues
## <chr> <lgl> <int>
## 1 after FALSE 0
## 2 argufy FALSE 6
## 3 ask FALSE 4
## 4 baseimports FALSE 0
## 5 citest TRUE 0
## 6 clisymbols FALSE 0
## 7 cmaker TRUE 0
## 8 cmark TRUE 0
## 9 conditions TRUE 0
## 10 crayon FALSE 7
## # ... with 20 more rows
udf %>%
mutate(repo_info = gh_repos %>%
map(. %>%
map_df(extract, c("name", "fork", "open_issues"))))## # A tibble: 6 x 3
## username gh_repos repo_info
## <chr> <list> <list>
## 1 gaborcsardi <list [30]> <tibble [30 x 3]>
## 2 jennybc <list [30]> <tibble [30 x 3]>
## 3 jtleek <list [30]> <tibble [30 x 3]>
## 4 juliasilge <list [26]> <tibble [26 x 3]>
## 5 leeper <list [30]> <tibble [30 x 3]>
## 6 masalmon <list [30]> <tibble [30 x 3]>
(rdf <- udf %>%
mutate(
repo_info = gh_repos %>%
map(. %>%
map_df(extract, c("name", "fork", "open_issues")))
) %>%
select(-gh_repos) %>%
tidyr::unnest())## # A tibble: 176 x 4
## username name fork open_issues
## <chr> <chr> <lgl> <int>
## 1 gaborcsardi after FALSE 0
## 2 gaborcsardi argufy FALSE 6
## 3 gaborcsardi ask FALSE 4
## 4 gaborcsardi baseimports FALSE 0
## 5 gaborcsardi citest TRUE 0
## 6 gaborcsardi clisymbols FALSE 0
## 7 gaborcsardi cmaker TRUE 0
## 8 gaborcsardi cmark TRUE 0
## 9 gaborcsardi conditions TRUE 0
## 10 gaborcsardi crayon FALSE 7
## # ... with 166 more rows